{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 特征缩放"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "在机器学习中进行梯度下降的时候，如果不同维度数据取值范围相差过大，就有可能会导致梯度下降的效率不高。\n",
    "所以要进行特征缩放"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_table('./ex1data2.txt', header=None,delimiter=',', names=['size', 'num', 'price'])\n",
    "data_oright =data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "        size   num     price\n0   0.345284  0.50  0.433962\n1   0.206288  0.50  0.301887\n2   0.426917  0.50  0.375660\n3   0.155543  0.25  0.117170\n4   0.592388  0.75  0.698113\n5   0.312466  0.75  0.245283\n6   0.188086  0.50  0.273585\n7   0.158577  0.50  0.054904\n8   0.145615  0.50  0.079434\n9   0.177055  0.50  0.136981\n10  0.300055  0.75  0.132262\n11  0.316602  0.50  0.334151\n12  0.286266  0.50  0.302074\n13  1.000000  1.00  1.000000\n14  0.114727  0.50  0.169811\n15  0.399338  0.75  0.528302\n16  0.129068  0.25  0.245283\n17  0.105902  0.50  0.056604\n18  0.484556  0.75  0.622826\n19  0.600938  0.75  0.809623\n20  0.252344  0.50  0.156604\n21  0.285714  0.25  0.160566\n22  0.207391  0.50  0.137736\n23  0.306122  0.75  0.169811\n24  0.837838  0.50  0.762264\n25  0.068395  0.50  0.150943\n26  0.167126  0.50  0.555849\n27  0.461666  0.50  0.564340\n28  0.371760  0.50  0.575660\n29  0.492278  0.50  0.245283\n30  0.272201  0.25  0.339623\n31  0.040816  0.00  0.000000\n32  0.327634  0.75  0.273585\n33  0.630171  0.50  0.773585\n34  0.264479  0.75  0.218868\n35  0.161335  0.50  0.150943\n36  0.106729  0.50  0.113208\n37  0.353006  0.75  0.330377\n38  0.927468  0.75  0.715283\n39  0.361280  0.75  0.220943\n40  0.223938  0.25  0.374717\n41  0.382239  0.50  0.301887\n42  0.472973  0.75  0.271887\n43  0.095974  0.50  0.243585\n44  0.000000  0.25  0.018868\n45  0.275786  0.75  0.245283\n46  0.096801  0.50  0.131321",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>size</th>\n      <th>num</th>\n      <th>price</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>0.345284</td>\n      <td>0.50</td>\n      <td>0.433962</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>0.206288</td>\n      <td>0.50</td>\n      <td>0.301887</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>0.426917</td>\n      <td>0.50</td>\n      <td>0.375660</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>0.155543</td>\n      <td>0.25</td>\n      <td>0.117170</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>0.592388</td>\n      <td>0.75</td>\n      <td>0.698113</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>0.312466</td>\n      <td>0.75</td>\n      <td>0.245283</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>0.188086</td>\n      <td>0.50</td>\n      <td>0.273585</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>0.158577</td>\n      <td>0.50</td>\n      <td>0.054904</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>0.145615</td>\n      <td>0.50</td>\n      <td>0.079434</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>0.177055</td>\n      <td>0.50</td>\n      <td>0.136981</td>\n    </tr>\n    <tr>\n      <th>10</th>\n      <td>0.300055</td>\n      <td>0.75</td>\n      <td>0.132262</td>\n    </tr>\n    <tr>\n      <th>11</th>\n      <td>0.316602</td>\n      <td>0.50</td>\n      <td>0.334151</td>\n    </tr>\n    <tr>\n      <th>12</th>\n      <td>0.286266</td>\n      <td>0.50</td>\n      <td>0.302074</td>\n    </tr>\n    <tr>\n      <th>13</th>\n      <td>1.000000</td>\n      <td>1.00</td>\n      <td>1.000000</td>\n    </tr>\n    <tr>\n      <th>14</th>\n      <td>0.114727</td>\n      <td>0.50</td>\n      <td>0.169811</td>\n    </tr>\n    <tr>\n      <th>15</th>\n      <td>0.399338</td>\n      <td>0.75</td>\n      <td>0.528302</td>\n    </tr>\n    <tr>\n      <th>16</th>\n      <td>0.129068</td>\n      <td>0.25</td>\n      <td>0.245283</td>\n    </tr>\n    <tr>\n      <th>17</th>\n      <td>0.105902</td>\n      <td>0.50</td>\n      <td>0.056604</td>\n    </tr>\n    <tr>\n      <th>18</th>\n      <td>0.484556</td>\n      <td>0.75</td>\n      <td>0.622826</td>\n    </tr>\n    <tr>\n      <th>19</th>\n      <td>0.600938</td>\n      <td>0.75</td>\n      <td>0.809623</td>\n    </tr>\n    <tr>\n      <th>20</th>\n      <td>0.252344</td>\n      <td>0.50</td>\n      <td>0.156604</td>\n    </tr>\n    <tr>\n      <th>21</th>\n      <td>0.285714</td>\n      <td>0.25</td>\n      <td>0.160566</td>\n    </tr>\n    <tr>\n      <th>22</th>\n      <td>0.207391</td>\n      <td>0.50</td>\n      <td>0.137736</td>\n    </tr>\n    <tr>\n      <th>23</th>\n      <td>0.306122</td>\n      <td>0.75</td>\n      <td>0.169811</td>\n    </tr>\n    <tr>\n      <th>24</th>\n      <td>0.837838</td>\n      <td>0.50</td>\n      <td>0.762264</td>\n    </tr>\n    <tr>\n      <th>25</th>\n      <td>0.068395</td>\n      <td>0.50</td>\n      <td>0.150943</td>\n    </tr>\n    <tr>\n      <th>26</th>\n      <td>0.167126</td>\n      <td>0.50</td>\n      <td>0.555849</td>\n    </tr>\n    <tr>\n      <th>27</th>\n      <td>0.461666</td>\n      <td>0.50</td>\n      <td>0.564340</td>\n    </tr>\n    <tr>\n      <th>28</th>\n      <td>0.371760</td>\n      <td>0.50</td>\n      <td>0.575660</td>\n    </tr>\n    <tr>\n      <th>29</th>\n      <td>0.492278</td>\n      <td>0.50</td>\n      <td>0.245283</td>\n    </tr>\n    <tr>\n      <th>30</th>\n      <td>0.272201</td>\n      <td>0.25</td>\n      <td>0.339623</td>\n    </tr>\n    <tr>\n      <th>31</th>\n      <td>0.040816</td>\n      <td>0.00</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>32</th>\n      <td>0.327634</td>\n      <td>0.75</td>\n      <td>0.273585</td>\n    </tr>\n    <tr>\n      <th>33</th>\n      <td>0.630171</td>\n      <td>0.50</td>\n      <td>0.773585</td>\n    </tr>\n    <tr>\n      <th>34</th>\n      <td>0.264479</td>\n      <td>0.75</td>\n      <td>0.218868</td>\n    </tr>\n    <tr>\n      <th>35</th>\n      <td>0.161335</td>\n      <td>0.50</td>\n      <td>0.150943</td>\n    </tr>\n    <tr>\n      <th>36</th>\n      <td>0.106729</td>\n      <td>0.50</td>\n      <td>0.113208</td>\n    </tr>\n    <tr>\n      <th>37</th>\n      <td>0.353006</td>\n      <td>0.75</td>\n      <td>0.330377</td>\n    </tr>\n    <tr>\n      <th>38</th>\n      <td>0.927468</td>\n      <td>0.75</td>\n      <td>0.715283</td>\n    </tr>\n    <tr>\n      <th>39</th>\n      <td>0.361280</td>\n      <td>0.75</td>\n      <td>0.220943</td>\n    </tr>\n    <tr>\n      <th>40</th>\n      <td>0.223938</td>\n      <td>0.25</td>\n      <td>0.374717</td>\n    </tr>\n    <tr>\n      <th>41</th>\n      <td>0.382239</td>\n      <td>0.50</td>\n      <td>0.301887</td>\n    </tr>\n    <tr>\n      <th>42</th>\n      <td>0.472973</td>\n      <td>0.75</td>\n      <td>0.271887</td>\n    </tr>\n    <tr>\n      <th>43</th>\n      <td>0.095974</td>\n      <td>0.50</td>\n      <td>0.243585</td>\n    </tr>\n    <tr>\n      <th>44</th>\n      <td>0.000000</td>\n      <td>0.25</td>\n      <td>0.018868</td>\n    </tr>\n    <tr>\n      <th>45</th>\n      <td>0.275786</td>\n      <td>0.75</td>\n      <td>0.245283</td>\n    </tr>\n    <tr>\n      <th>46</th>\n      <td>0.096801</td>\n      <td>0.50</td>\n      <td>0.131321</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 6
    }
   ],
   "source": [
    "#最大最小归一化\n",
    "for col in data.columns:\n",
    "    data[col] = (data[col]-data[col].min()) / (data[col].max() - data[col].min())\n",
    "\n",
    "data\n",
    "# X, Y= np.meshgrid(data['size'],data['num'])\n",
    "# Z = np.tile(data['price'],(47,1))\n",
    "# # plt.contour(X,Y,Z)\n",
    "# X,Y,Z\n",
    "# # data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6-final"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "python3",
   "display_name": "Python 3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}